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* This function computes the Levenshtein distance of two strings 

* 

* ©param s the first string 

* @param t the second string 

* ©param n length of s 

* @ param m length of t 

* ©return Levenshtein distance of s and t 

*/ 

public static int levenshteinComputations(String s, 
K String t, 

intn, 

int m) { 

inti . //iterates through s 

in* i- //iterates through t 

' t II used to initialize s and t 

char'sj; // ith character of s 

int jNext; 

int iNext; 

int prevJVal; 

int startAt = 0; 

int a, b, c, temp; 

boolean substnngBroken = false, 

// Levenshtein matrix , v 
//intGQ levenshteinDist = retumlnrtMatnx(n+1 , m+l), 

// Step 1 (takes care of a null string) 

' f ^sffiDiJn?m| = (n = 0)?m:n; 
return 0; 

} 

// Step 1 .5 (eliminate the common initial string in the strings) 
temD = (m < n)?m:n; . f 

for (i = 0- (i < temp) && IsubstringBroken; i++) { 
if (s.charAt(i) != t.charAt(i)) { 

. startAt = i; 
substringBroken = true; 

} 

} 

if (IsubstringBroken && i == temp) { 
//FteKm Immediately if one string is completely contained in 

te^enshteinDist[n][m] = (m > n)?(m - n):(n - m); 
return startAt; 

} 
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// Step 2 (initialize the elements of the matrix) 
for (k = 0, i = startAt; i <= n; i++) 
levenshteinDist[i][startAt] = k++; 

for (k = 0, j = startAt; j < m; j++) { 
levenshteinDist[startAt][j] = k++; 
tBufferO] = t.charAt(j); 

} 

levenshteinDist[startAt][m] = k; 

// Step 3 (perform the computation) 
for (i = startAt; i < n;) { 

sj = s.charAt(i); 

iNext = i+1; 

// optimization: minimize array references by setting the 

// temporary variable prevJVal. Set the previous j value to the 

//value at levenshtein[iNext][startAt] in order to initialize it. 

// (see initialization at Step 2) 

prevJVal = levenshteinDist[iNext][startAt]; 

// Step 4 

for (j = startAt; j < m;) { 
jNext = j+1 ; 

a = levenshteinDist[i][jNext]+1 ; 

// b = levenshteinDist[iNext][j]+1 ; 
b = prevJVal+1; 

// Step 5 

c = (sj == tBufferO])?levenshteinDist[i]0]:levenshteinDist[i]0]+1; 
// Step 6 

temp = (a < b)?a:b; 

levenshteinDist[iNext][jNext] = prevJVal = (c < temp)?c:temp; 
j= jNext; 

} 

i = iNext; 

} 

// Step 7 (return the levenshtein matrix and the starting position) 
return startAt; 

} 
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j-k-k 

* This function computes the largest common substring score of two strings 

* @ param s the first string 

* @ param t the second string 

7 

prevStringl = s; 
prevString2 = t; 

previousLevScore = 1 - (((float) (distance « 1))/ 
((float) (lengths + lengthT))); 

returnVal += previousLevScore; 

if (HargeLengthDiff) { 

// Calculate the substring score if this is the case 
if (returnVal > 0 && returnVal < maxScore) { 

int currMaxLength = 0; 

int k, I; 

int currlteration; 
int d1; 

int dljength; 
int d2; 

// Loop through the rows, then the columns 
for (k = startAt; k <= lengthS; k++) { 

currlteration = k; 

d1 =0; 

d1_length = 0; 

for (I = startAt; 
(I <= lengthT) && (currlteration <= lengthS); 
!++){ 

d2 = levenshteinDist[currlteration][l]; 

if(d1 = d2) 

d1Jength++; 
else 

dljength = 1; 

d1 =d2; 
currlteration++; 

} 

if (dljength > currMaxLength) 
currMaxLength = dljength; 

} 
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// Loop through the columns then the rows 
for (I = startAt; I <= lengthT; I++) { 

currlteration = I; 

d1 =0; 

d1_length = 0; 

for (k = startAt; 
(k <= lengths) && (currlteration <= lengthT); 
k++){ 

d2 = levenshteinDist[k][currlteration]; 

if (d1 == d2) 

d1_length++; 
else 

d1_length = 1; 

d1 = d2; 
currlteration++; 

} 

if (dljength > currMaxLength) 
currMaxLength = dljength; 



// Make sure that the matching substring is not the 
// initial match 

if (startAt > currMaxLength) { 

currMaxLength = startAt; 
} else { 

currMaxLength-; 

} 
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I** 

* Takes two strings and gets a score based on their acronyms 

* @param strl first string 

* @param str2 second string 

* ©param m the multipliers to return 

* @ return the score of comparison between the acronyms 

public static float scoreAcronyms(String strl , 

String str2, 

float partial Match, 

float exactMatch) { 
int acrl Length = 1 ; 
int acr2Length = 1 ; 
int strl Length = strl .length(); 
int str2Length = str2.length(); 
int minLength, i; 

if (strl == null II str2 = null) { 
return 0; 

} 

// get the acronym representation of string 1 
acr1[0] = str1.charAt(0); 
for (i = 1 ; i < strl Length; i++) { 
if (strl .charAt(i) = ' ' && (++i) < strl Length) 
acrl [acrl Length++] = strl .charAt(i); 



// if there is only one word, copy the entire string into the acronym 
if (acrl Length == 1){ 
for (i = 1 ; i < strl Length; i++) { 
acr1[acr1 Length++] = strl .charAt(i); 



// get the acronym representation of string 2 
acr2[0] = str2.charAt(0); 
for (i = 1 ; i < str2Length; i++) { 
if (str2.charAt(i) == 1 1 && (++i) < str2Length) 
acr2[acr2Length++] = str2.charAt(i); 



// if there is only one word, copy the entire string into the acronym 
// this allows us to match already-acronymized names to non-acronymized 
// strings (e.g., ge = general electric) 
if (acr2Length = 1){ 
for (i = 1 ; i < str2Length; i++) { 
acr2[acr2Length++] = str2.charAt(i); 
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// see how equal the acronyms are. 

minLength = (acrl Length > acr2Length)?acr2Length:acr1 Length; 
for (i = 0; (i < minLength) && (acr1[i] == acr2[i]); i++) {} 



// give the acronyms a non-zero score only if the loop above completed, 
if (j == minLength) 

return (acrl Length = acr2Length)?exactMatch:partialMatch; 
else 

return 0; 

} 
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r 

* Returns a consolidated Levenshtein, Substring, and Acronymn score 

* 

* @param s the first string 

* @param t the second string 

* ©param maxScore the maximum allowable returnable score 

* @ param m the multipliers to use 

* @ return the consolidated score of both these substrings 

7 

public static float consolidatedScore(String s, 
String t, 

float maxScore) { 

float retumVal = 0; 

// If the strings are equal, we are done, just return the max score 
if (s.equals(t)) 

return maxScore; 
else{ 

// Previous strings have been cached to save computations 
if (prevStringl.equals(s) && prevString2.equals(t)) { 

retumVal = maxScore; 
} else { 

// Set the lengths we're going to use for computations, 
int distance; 

int lengths = s.length(); 
int lengthT = t.length(); 

int longerLength = (lengths > lengthT)?lengthS:lengthT; 
int shorterLength = (lengths > lengthT)?lengthT:lengthS; 
intstartAt; 

boolean largeLengthDiff; 

// If the lowest among top 25 scores is less than 0, then see 

// if we can just approximate the levenshtein distance 

if (largeLengthDiff = (longerLength > (shorterLength « 2))) { 

distance = longerLength - shorterLength + 1 ; 

startAt = 0; 
} else { 

startAt = levenshteinComputations(s, 
t, 

lengths, 
lengthT); 

distance = levenshteinDist[lengthS][lengthT]; 

} 
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// Compute and scale the substring score, add it to the 
// returned value. 

retumVal += (((float) currMaxLength) / 

((float) ((lengths > lengthT)?lengthS:lengthT))); 

} 

} 

} 

// Scale down because both Levenshtein and Substring are out of one 
retumVal *= 0.5; 
return retumVal; 
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